home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Shareware Grab Bag
/
Shareware Grab Bag.iso
/
007
/
tools.c
< prev
next >
Wrap
C/C++ Source or Header
|
1985-06-03
|
14KB
|
592 lines
#include "b:stdio.h"
#include "b:grep.h"
/*
* This module contains the utilities for use by the GREP
* routine to match patterns. Routines are in alphabetical
* order.
*/
int amatch( lin , pat, boln)
char *lin, *boln ;
TOKEN *pat ;
{
/* Scans throught the pattern template looking for a match
* with lin. Each element of lin is compared with the template
* until either a mis-match is found or the end of the tempalte
* is reached. In teh formaer cas a zero is returned; in the
* latter, a pointer into lin ( pointing to the last character in
* in the matched pattern) is returned .
*
* 'lin' is a pointer to the line being searched
* 'pat' is a pointer to the template made by matchpat()
* 'boln' is a pointer into 'lin' which points to the character
* at the beginning of the line.
*
*/
register char *bocl, *rval, *strstart ;
if( pat == 0)
return(0) ;
strstart = lin ;
while( pat ) {
if( pat->tok == CLOSURE && pat->next ) {
/* Process a closure: First skip over the closure token
* to the object to be repeated
*/
pat = pat->next ;
/* Now match as many occurances of the closure pattern as
* possible
*/
bocl = lin ;
while( *lin && omatch(&lin, pat) )
;
/* 'lin' now points to the character that mad us fail. Now
* go on to process the rest of the string. A problem here is
* a character following the closure which could have been in
* the closure. For example, in the pattern '[a-z]*t', the final
* 't' would have been sucked up while in the loop. So if the
* match fails, we back up a notch a try to match the rest of
* the string again, repeating the process recursively until we
* get to the beginning of the closure. The recursion goes
* at most two levels deep.
*/
if(pat = pat->next) {
while( bocl <= lin ) {
if( rval = amatch(lin, pat, boln) ) {
return( rval ) ; /* success */
}
else
--lin ;
}
return(0) ; /* matched failed */
}
}
else if ( omatch( &lin, pat, boln) ) {
pat = pat->next ;
}
else {
return( 0 ) ;
}
}
/* Note that omatch() advances lin to point at the next character
* to be matched; consequently, when we reach the end of the template,
* lin will be pointing at the character following the last character
* matched. The exceptions are tempaltes containing only a BOl or EOL
* token. In these cases omatch doesn't advance.
*
* So, decrement lin to make it point at the end of the matched string
* as long as this doesn't take us past the beginning of the string.
*/
return( max(strstart, --lin) ) ;
}
/*---------------------------------------------------------------*/
delete( ch, str )
int ch ;
register char *str ;
{
/* delete the first occurance of the character from the string
* and move everything else over a notch to fill the hole
*/
ch &= 0xff ;
while( *str && *str != ch)
str++ ;
while( *str ) {
*str = *(str+1) ;
str++ ;
}
}
/*--------------------------------------------------------------*/
int dodash( delim, src, dest, maxccl )
int delim, maxccl ;
char **src, *dest ;
{
/* Expand the set pointed to by *src into dest. Stop at delim.
* Return 0 on error or the size of the character class on success.
* Update *src to point at delim. A set can have on element {x} or
* several elements ( {abcdefghijklmn} and {a-n} are equivalent).
* Note: the dash notation is expanded as sequential numbers. This
* means that the set {a-Z} will contain the extra characters [\]^_ and
* ` . The maximum number of characters is defined by maxccl
*/
register char *dstart ;
register int k, at_begin ;
char *sptr, ctest ;
dstart = dest ;
sptr = *src ;
at_begin = 1 ;
while( *sptr && (*sptr != delim ) && (dest-dstart < maxccl) ) {
if( *sptr == ESCAPE ) { /* handle escape \ characters*/
*dest++ = esc( &sptr ) ;
sptr++ ;
}
else if ( *sptr != '-' ) { /* handle normal characters */
*dest++ = *sptr++ ;
}
else if ( at_begin || *(sptr+1) == delim ) {
*dest++ = '-' ; /* literal '-' */
}
else if ( *(sptr-1) <= *(sptr+1) ) {
sptr++ ;
for( k = *(sptr-2) ; ++k <= *sptr ; )
*dest++ = k ;
sptr++ ;
}
else {
return(0) ;
}
at_begin = 0 ;
}
*dest++ = '\000' ;
*src = sptr ;
return( dest - dstart ) ;
}
/*-----------------------------------------------------------------*/
int esc(s)
char **s ;
{
register int rval ;
/* Map escape sequences onto their actual ASCII values. If no
* escape prefix is present, s is untouched and *s is returned,
* otherwise **s is advanced to point to the escaped character
* and the translated character is returned
*/
if ( **s != ESCAPE ) {
rval = **s ;
}
else {
(*s)++ ;
switch( toupper(**s) ) {
case '\000' : rval = ESCAPE ; break ;
case 'S' : rval = ' ' ; break ;
case 'N' : rval = '\n' ; break ;
case 'T' : rval = '\t' ; break ;
case 'B' : rval = '\b' ; break ;
case 'R' : rval = '\r' ; break ;
default : rval = **s ; break ;
}
}
return( rval) ;
}
/*----------------------------------------------------------------*/
TOKEN *getpat( arg )
char *arg ;
{
/* Translate arg into a token string */
return( makepat( arg, '\000' ) ) ;
}
/*-----------------------------------------------------------------*/
insert( ch, str )
int ch ;
register char *str ;
{
/* Insert ch into string at the place pointed to by str. Move
* everything else over a notch
*/
register char *bp ;
bp = str ;
while( *str ) /* find the end of the string */
str++ ;
do { /* move the string over a notch */
*(str+1) = *str ;
str-- ;
} while ( str >= bp ) ;
*bp = ch ;
}
/*------------------------------------------------------------------*/
char *in_string( delim, str )
register int delim ;
register char *str ;
{
/* return a pointer to delim if it is in the string, 0 otherwise */
delim &= 0x7f ;
while( *str && *str != delim )
str++ ;
return( *str ? str : 0 ) ;
}
/*-------------------------------------------------------------------*/
int isalphanum(c)
int c ;
{
return( ('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') ) ;
}
/*--------------------------------------------------------------------*/
TOKEN *makepat( arg, delim )
char *arg ;
int delim ;
{
/* Make a pattern template from the string pointed to by arg. Stop
* when delim or '\000' or '\n' is found is arg. Reurn a pointer to
* the pattern template.
*
* The pattern template used here is somewhat different from those
* used in the book; each token is a structure of the form TOKEN.
* A token consists of an identifier, a pointer to a string, a literal
* character and a pointer to the next token. This last is 0 if there is
* no additional token.
*
*/
TOKEN *head, *tail ;
TOKEN *ntok ;
char buf[CLS_SIZE] ;
int error ;
/* Check for characters thatt aren't legal at the begiining of a token */
if ( *arg == '\0' || *arg == delim || *arg == '\n' || *arg == CLOSURE)
return(0) ;
error = 0 ;
head = 0 ;
tail = 0 ;
while( *arg && *arg != delim && *arg != '\n' && !error ) {
ntok = malloc ( TOKSIZE ) ;
ntok->string = &(ntok->lchar) ;
ntok->lchar = '\000' ;
ntok->next = 0 ;
switch( *arg ) {
case ANY :
ntok->tok = ANY ;
break ;
case BOL :
if( head == 0 ) /* this is the first symbol */
ntok->tok = BOL ;
else
error = 1 ;
break ;
case EOL :
if( *(arg+1) == delim || *(arg+1) == '\000' || *(arg+1) == '\n' )
ntok->tok = EOL ;
else
error = 1 ;
break ;
case CCL :
if ( *(arg+1) == NEGATE ) {
ntok->tok = NCCL ;
arg += 2 ;
}
else {
ntok->tok = CCL ;
arg++ ;
}
error = dodash( CCLEND, &arg, buf, CLS_SIZE) ;
if ( error != 0 ) {
ntok->string = malloc( error ) ;
strcpy( ntok->string, buf ) ;
error = 0 ;
}
break ;
case CLOSURE:
if( head != 0 ) {
switch( tail->tok ) {
case BOL:
case EOL:
case CLOSURE:
return(0) ;
default:
ntok->tok = CLOSURE ;
}
}
break ;
default:
ntok->tok = LITCHAR ;
ntok->lchar = esc( &arg ) ;
}
if( error || ntok == 0 ) {
unmakepat( head ) ;
return( 0 ) ;
}
else if ( head == 0 ) {
ntok->next = 0 ; /* this is the first node in the chain */
head = tail = ntok ;
}
else if ( ntok->tok != CLOSURE ) {
tail->next = ntok ; /* Insert at the end of the list */
ntok->next = tail ;
tail = ntok ;
}
else if( head != tail ) {
(tail->next)->next = ntok ; /* More than one node in the chain. */
ntok->next = tail ; /* Insert the CLOSURE node immediately */
/* in front of the tail */
}
else {
ntok->next = head ; /* Only one node in the chain, Insert */
tail->next = ntok ; /* node at the head of the linked list*/
head = ntok ;
}
arg++ ;
}
tail->next = 0 ;
return( head ) ;
}
/*-----------------------------------------------------------------------*/
char *matchs( line, pat, ret_endp )
char *line ;
TOKEN *pat ;
int ret_endp ;
{
/* Compares line and pattern. Line is a character string while pat
* is a pattern template made by getpat(). Returns:
* (1) a 0 if no match is found
* (2) a pointer to the last character satisfying the match if
* ret_endp is non-zero
* (3) A pointer to the beggining of the matched string if ret_endp
* is non-zero
*/
char *rval, *bptr ;
bptr = line ;
while( *line ) {
if( (rval = amatch(line, pat, bptr)) == 0 ) {
line++ ;
}
else {
rval = ret_endp ? rval : line ;
break ;
}
}
return(rval) ;
}
/*--------------------------------------------------------------------------*/
stoupper( str )
char *str ;
{
/* Map the entire string pointed to by str to upper case */
char *rval ;
rval = str ;
while( *str ) {
if( 'a' <= *str && *str <= 'z' )
*str -= ('a' - 'A') ;
str++ ;
}
}
/*---------------------------------------------------------------------------*/
int max( x, y )
int x, y ;
{
return( (x>y) ? x : y ) ;
}
/*----------------------------------------------------------------------------*/
int omatch( linp, pat, boln)
char **linp, *boln ;
TOKEN *pat ;
{
/* Match one pattern element, pointed at by pat, with the character
* at **linp. Return non-zero on match. Otherwise return 0. Linp is
* advanced to skip over the matched cahracter; it is not advanced on
* failure. The amount of teh match is 0 for matches to null strings,
* or 1 otherwise. "boln" should point at the position at the position
* that will match a BOL token.
*/
register int advance ;
advance = -1 ;
if( **linp ) {
switch( pat->tok ) {
case LITCHAR:
if( **linp == pat->lchar )
advance = 1 ;
break ;
case BOL:
if( **linp == boln )
advance = 0 ;
break ;
case ANY:
if( **linp != '\n' )
advance = 1 ;
break ;
case EOL:
if( **linp == '\n' )
advance = 0 ;
break ;
case CCL:
if( in_string( **linp, pat->string) )
advance = 1 ;
break ;
case NCCL:
if( ! in_string( **linp, pat->string) )
advance = 1 ;
break ;
default:
printf("omatch: can't happen\n") ;
}
}
if( advance >= 0 )
*linp += advance ;
return( ++advance ) ;
}
/*---------------------------------------------------------------------*/
pr_line( ln )
register char *ln ;
{
for( ; *ln ; ln++ ) {
if( (' ' <= *ln) && (*ln <= '~') )
putchar( *ln ) ;
else {
printf("\\0x%02x", *ln) ;
if( *ln == '\n' )
putchar( '\n' ) ;
}
}
}
/*--------------------------------------------------------------------*/
pr_tok( head )
TOKEN *head ;
{
register char *str ;
for ( ; head ; head = head->next ) {
switch( head->tok ) {
case BOL:
str = "BOL" ;
break ;
case EOL:
str = "EOL" ;
break ;
case ANY:
str = "ANY" ;
break ;
case LITCHAR:
str = "LITCHAR" ;
break ;
case ESCAPE:
str = "ESCAPE" ;
break ;
case CCL:
str = "CCL" ;
break ;
case CCLEND:
str = "CCLEND" ;
break ;
case NEGATE:
str = "NEGATE" ;
break ;
case NCCL:
str = "NCCL" ;
break ;
case CLOSURE:
str = "CLOSURE" ;
break ;
default:
str = "***** unknown *****" ;
break ;
}
printf("%-8s at: 0x%x, ",str, head ) ;
if ( head->tok == CCL || head->tok == NCCL )
printf("string = [ %s ] =, ",head->string) ;
else if (head->tok == LITCHAR )
printf("lchar = %c, ",head->lchar) ;
printf("next = 0x%x\n", head->next) ;
}
putchar('\n') ;
}
/*-----------------------------------------------------------------------*/
unmakepat( head )
TOKEN *head ;
{
register TOKEN *old_head ;
while( head ) {
switch (head->tok) {
case CCL:
case NCCL:
free(head->string) ;
default:
old_head = head ;
head = head->next ;
free(old_head) ;
break ;
}
}
}